# load library
library(tidyverse)
## -- Attaching packages ---------------------------------------------- tidyverse 1.3.0 --
## <U+2713> ggplot2 3.2.1 <U+2713> purrr 0.3.2
## <U+2713> tibble 2.1.3 <U+2713> dplyr 0.8.3
## <U+2713> tidyr 1.0.0 <U+2713> stringr 1.4.0
## <U+2713> readr 1.3.1 <U+2713> forcats 0.4.0
## -- Conflicts ------------------------------------------------- tidyverse_conflicts() --
## x dplyr::filter() masks stats::filter()
## x dplyr::lag() masks stats::lag()
library(dplyr)
library(ggplot2)
library(gridExtra) # use to put graphs together in the same frame
##
## Attaching package: 'gridExtra'
## The following object is masked from 'package:dplyr':
##
## combine
library(scales) # use to improve colors
##
## Attaching package: 'scales'
## The following object is masked from 'package:purrr':
##
## discard
## The following object is masked from 'package:readr':
##
## col_factor
library(janitor) # piping function
##
## Attaching package: 'janitor'
## The following objects are masked from 'package:stats':
##
## chisq.test, fisher.test
library(sf) # possible geomap
## Linking to GEOS 3.6.1, GDAL 2.2.3, PROJ 4.9.3
library(kableExtra) # clean table design
##
## Attaching package: 'kableExtra'
## The following object is masked from 'package:dplyr':
##
## group_rows
library(GGally) #used to display ggpairs
## Registered S3 method overwritten by 'GGally':
## method from
## +.gg ggplot2
##
## Attaching package: 'GGally'
## The following object is masked from 'package:dplyr':
##
## nasa
library(knitr)
library(treemapify)
library(ggthemes)
library(shiny)
library(vctrs)
library(MultNonParam) # hypothesis testing for median
library(ggforce)
library(cowplot)
##
## ********************************************************
## Note: As of version 1.0.0, cowplot does not change the
## default ggplot2 theme anymore. To recover the previous
## behavior, execute:
## theme_set(theme_cowplot())
## ********************************************************
##
## Attaching package: 'cowplot'
## The following object is masked from 'package:ggthemes':
##
## theme_map
library(egg)
library(formattable)
##
## Attaching package: 'formattable'
## The following objects are masked from 'package:scales':
##
## comma, percent, scientific
library(inspectdf) # New package employeed for base EDA
#library(DataExplorer)
#library(treemap)
library(maps)
##
## Attaching package: 'maps'
## The following object is masked from 'package:purrr':
##
## map
library(mapdata)
library(maptools)
## Loading required package: sp
## Checking rgeos availability: TRUE
library(mapproj)
library(MASS)
##
## Attaching package: 'MASS'
## The following object is masked from 'package:formattable':
##
## area
## The following object is masked from 'package:dplyr':
##
## select
library(RgoogleMaps)
library(RColorBrewer)
library(plotGoogleMaps)
## Loading required package: spacetime
## Registered S3 method overwritten by 'xts':
## method from
## as.zoo.xts zoo
library(shiny)
library(leaflet)
devtools::install_github("dkahle/ggmap", ref = "tidyup")
## Skipping install of 'ggmap' from a github remote, the SHA1 (2d756e5e) has not changed since last install.
## Use `force = TRUE` to force installation
# data import
crime <- read.csv("crime.csv")
Data of crimes in Vancouver(Canada) from 2003 to 2017
Data obtained from kaggle.com, originally comes from the Vancouver Open Data Catalogue, extracted on 2017-2018 with 530652 records between 2003-01-01 and 2017-07-13
crime %>%
inspect_types() %>%
show_plot()
crime %>%
inspect_cat() %>%
show_plot()
crime %>%
inspect_num %>%
show_plot()
crime %>%
inspect_cor() %>%
show_plot()
#crime %>%
# plot_intro()
#crime %>% plot_bar()
#crime %>% plot_histogram()
#crime %>% plot_correlation(maxcat = 5L)
head(crime, 10)
## TYPE YEAR MONTH DAY HOUR MINUTE
## 1 Other Theft 2003 5 12 16 15
## 2 Other Theft 2003 5 7 15 20
## 3 Other Theft 2003 4 23 16 40
## 4 Other Theft 2003 4 20 11 15
## 5 Other Theft 2003 4 12 17 45
## 6 Other Theft 2003 3 26 20 45
## 7 Break and Enter Residential/Other 2003 3 10 12 0
## 8 Mischief 2003 6 28 4 13
## 9 Other Theft 2003 2 16 9 2
## 10 Break and Enter Residential/Other 2003 7 9 18 15
## HUNDRED_BLOCK NEIGHBOURHOOD X Y Latitude Longitude
## 1 9XX TERMINAL AVE Strathcona 493906.5 5457452 49.26980 -123.0838
## 2 9XX TERMINAL AVE Strathcona 493906.5 5457452 49.26980 -123.0838
## 3 9XX TERMINAL AVE Strathcona 493906.5 5457452 49.26980 -123.0838
## 4 9XX TERMINAL AVE Strathcona 493906.5 5457452 49.26980 -123.0838
## 5 9XX TERMINAL AVE Strathcona 493906.5 5457452 49.26980 -123.0838
## 6 9XX TERMINAL AVE Strathcona 493906.5 5457452 49.26980 -123.0838
## 7 63XX WILTSHIRE ST Kerrisdale 489325.6 5452818 49.22805 -123.1466
## 8 40XX W 19TH AVE Dunbar-Southlands 485903.1 5455884 49.25556 -123.1937
## 9 9XX TERMINAL AVE Strathcona 493906.5 5457452 49.26980 -123.0838
## 10 18XX E 3RD AVE Grandview-Woodland 495078.2 5457221 49.26773 -123.0677
Data looks tidy, ready to start analysis
# Examine the structure of the data
str(crime)
## 'data.frame': 530652 obs. of 12 variables:
## $ TYPE : Factor w/ 11 levels "Break and Enter Commercial",..: 6 6 6 6 6 6 2 4 6 2 ...
## $ YEAR : int 2003 2003 2003 2003 2003 2003 2003 2003 2003 2003 ...
## $ MONTH : int 5 5 4 4 4 3 3 6 2 7 ...
## $ DAY : int 12 7 23 20 12 26 10 28 16 9 ...
## $ HOUR : int 16 15 16 11 17 20 12 4 9 18 ...
## $ MINUTE : int 15 20 40 15 45 45 0 13 2 15 ...
## $ HUNDRED_BLOCK: Factor w/ 21205 levels ""," / 3888 W 50TH AVE",..: 14372 14372 14372 14372 14372 14372 11454 8420 14372 2284 ...
## $ NEIGHBOURHOOD: Factor w/ 25 levels "","Arbutus Ridge",..: 21 21 21 21 21 21 9 4 21 6 ...
## $ X : num 493907 493907 493907 493907 493907 ...
## $ Y : num 5457452 5457452 5457452 5457452 5457452 ...
## $ Latitude : num 49.3 49.3 49.3 49.3 49.3 ...
## $ Longitude : num -123 -123 -123 -123 -123 ...
# Examine the descriptive statistics
crime %>% summary()
## TYPE YEAR MONTH
## Theft from Vehicle :172700 Min. :2003 Min. : 1.000
## Mischief : 70413 1st Qu.:2005 1st Qu.: 4.000
## Break and Enter Residential/Other: 60862 Median :2009 Median : 6.000
## Offence Against a Person : 54142 Mean :2009 Mean : 6.451
## Other Theft : 52167 3rd Qu.:2013 3rd Qu.: 9.000
## Theft of Vehicle : 38418 Max. :2017 Max. :12.000
## (Other) : 81950
## DAY HOUR MINUTE
## Min. : 1.00 Min. : 0.00 Min. : 0.00
## 1st Qu.: 8.00 1st Qu.: 9.00 1st Qu.: 0.00
## Median :15.00 Median :15.00 Median :10.00
## Mean :15.41 Mean :13.71 Mean :16.94
## 3rd Qu.:23.00 3rd Qu.:19.00 3rd Qu.:30.00
## Max. :31.00 Max. :23.00 Max. :59.00
## NA's :54362 NA's :54362
## HUNDRED_BLOCK NEIGHBOURHOOD
## OFFSET TO PROTECT PRIVACY: 54362 Central Business District:110947
## 7XX GRANVILLE ST : 4629 : 56624
## 6XX GRANVILLE ST : 3151 West End : 41352
## 7XX W GEORGIA ST : 2427 Fairview : 32161
## X NK_LOC ST : 2244 Mount Pleasant : 30536
## 6XX W 41ST AVE : 1900 Grandview-Woodland : 27180
## (Other) :461939 (Other) :231852
## X Y Latitude Longitude
## Min. : 0 Min. : 0 Min. : 0.00 Min. :-124.5
## 1st Qu.:489945 1st Qu.:5453651 1st Qu.:49.24 1st Qu.:-123.1
## Median :491499 Median :5456840 Median :49.26 Median :-123.1
## Mean :441802 Mean :4897663 Mean :44.22 Mean :-110.5
## 3rd Qu.:493547 3rd Qu.:5458638 3rd Qu.:49.28 3rd Qu.:-123.1
## Max. :511303 Max. :5512579 Max. :49.76 Max. : 0.0
##
Data observations
Potential research questions / issues
Make a graph and examine for each variable individually
# YEAR
ggplot(crime, aes(x = YEAR)) + geom_line(stat="count") + theme_classic() + labs(title = "Crime Incidents by Year", x = "Year", y = "Number of Crimes" ) + theme(axis.title = element_text(colour = "dark green"), title = element_text(colour = "blue"))
Comments
- Number of crimes decreases until 2011
- Starts and continues to increase from 2014 (2017 has only records up to July 13th)
# MONTH
ggplot(crime, aes(x = MONTH)) + geom_line(stat="count") + theme_classic() + labs(title = "Crime Incidents by Month", x = "Month", y = "Number of Crimes" ) + theme(axis.title = element_text(colour = "dark green"), title = element_text(colour = "blue"))
Comments
- Number of crimes relatively evenly distributed across months
- February has slightly lower crime incidents
- Slight decline from Oct to Dec
# DAY
ggplot(crime, aes(x = DAY)) + geom_line(stat="count") + theme_classic() + labs(title = "Crime Incidents by Day", x = "Day", y = "Number of Crimes" ) + theme(axis.title = element_text(colour = "dark green"), title = element_text(colour = "blue"))
Comments
- Crime rate is well distributed across days
- 1st and 15th day of month have higher crime incidents
- The rate slightly decreases toward end of month
- Rate for 31st day is about a half of other days because there are only seven months with 31days in a year.
# HOUR
# 54362 records are N/A for hour
ggplot(crime, aes(x = HOUR)) + geom_line(stat="count") + theme_classic() + labs(title = "Crime Incidents by Hour", x = "Hour", y = "Number of Crimes" ) + theme(axis.title = element_text(colour = "dark green"), title = element_text(colour = "blue"))
## Warning: Removed 54362 rows containing non-finite values (stat_count).
Comments
- Four significant peaks at 0, 12, and 18 hour
- Substantial decline from 0 to 1 hour and continual decrease between 1 - 5 hour
- Rate growth until 18 hour (max peak)
Questions
- Unlike common perception about time of crime frequency, day time has high crime incident rate. In contrast, crime rate from early morning aka after midnight until 6am is the lowest throughout day. What are potential causes for this?
- are benign and severe crimes share the same hour trend?
# MINUTE
# 54362 records are N/A for minute (same as hour)
ggplot(crime, aes(x = MINUTE)) + geom_line(stat="count") + theme_classic() + labs(title = "Crime Incidents by Minute", x = "MINUTE", y = "Number of Crimes" ) + theme(axis.title = element_text(colour = "dark green"), title = element_text(colour = "blue"))
## Warning: Removed 54362 rows containing non-finite values (stat_count).
Comments
- while it is well-distributed, 0 and 30 min have remarkably high crime rates
- there are still relatively higher crime rates at 1st/3rd quaters
# TYPE - flipped
crime %>%
ggplot( aes(x = TYPE)) + geom_bar() + theme_classic() + labs(title = "Crime Incidents by Type", x = "Type of Crime", y = "Number of Crimes" ) + theme(axis.title = element_text(colour = "dark green"), title = element_text(colour = "blue")) + coord_flip()
Comments
- The most frequent type of crime is "Theft from Vehicle"
- Very little number of incidents of "Vehicle Collision or Pedestrian Struck(with Fatality)" and "Homicide"
Questions
- Split into groups of benign and severe crime? Would it give a different look into the data?
# NEIGHBOURHOOD -flipped
ggplot(crime, aes(x = NEIGHBOURHOOD)) + geom_bar() + theme_classic() + labs(title = "Crime Incidents by Neighbourhood", x = "Neighbourhood", y = "Number of Crimes" ) + theme(axis.title = element_text(colour = "dark green"), title = element_text(colour = "blue")) + coord_flip()
Comments
- 3 neighbourhoods (Central Business District, Arbutus Ridge, West End) have notably higher number of crimes
- Musqueam has very low number of crimes
- Crime rate is spread over among other neighbourhoods within certain range
crime_x <- crime[, "X"]
crime_y <- crime[, "Y"]
crime_h <- crime[, "HUNDRED_BLOCK"]
crime_Lt <- crime[, "Latitude"]
crime_Lg <- crime[, "Longitude"]
count(crime, X) %>% arrange(desc(n))
## # A tibble: 95,502 x 2
## X n
## <dbl> <int>
## 1 0 54362
## 2 491446. 2502
## 3 492757. 2191
## 4 491399. 1738
## 5 497309. 1629
## 6 495037. 1457
## 7 491295. 1252
## 8 492934. 1232
## 9 491408. 1101
## 10 491401. 1095
## # … with 95,492 more rows
count(crime, Y) %>% arrange(desc(n))
## # A tibble: 93,544 x 2
## Y n
## <dbl> <int>
## 1 0 54362
## 2 5458896. 2502
## 3 5458792. 2191
## 4 5458862. 1738
## 5 5456156. 1654
## 6 5456614. 1457
## 7 5458745. 1252
## 8 5456669. 1232
## 9 5458872. 1101
## 10 5458865. 1095
## # … with 93,534 more rows
count(crime, HUNDRED_BLOCK) %>% arrange(desc(n))
## # A tibble: 21,205 x 2
## HUNDRED_BLOCK n
## <fct> <int>
## 1 OFFSET TO PROTECT PRIVACY 54362
## 2 7XX GRANVILLE ST 4629
## 3 6XX GRANVILLE ST 3151
## 4 7XX W GEORGIA ST 2427
## 5 X NK_LOC ST 2244
## 6 6XX W 41ST AVE 1900
## 7 3XX ABBOTT ST 1795
## 8 31XX GRANDVIEW HWY 1784
## 9 11XX ROBSON ST 1758
## 10 17XX E BROADWAY AVE 1718
## # … with 21,195 more rows
count(crime, Latitude) %>% arrange(desc(n))
## # A tibble: 101,885 x 2
## Latitude n
## <dbl> <int>
## 1 0 54362
## 2 49.3 2502
## 3 49.3 2191
## 4 49.3 1738
## 5 49.3 1629
## 6 49.3 1457
## 7 49.3 1252
## 8 49.3 1232
## 9 49.3 1101
## 10 49.3 1095
## # … with 101,875 more rows
count(crime, Longitude) %>% arrange(desc(n))
## # A tibble: 98,972 x 2
## Longitude n
## <dbl> <int>
## 1 0 54362
## 2 -123. 2502
## 3 -123. 2191
## 4 -123. 1738
## 5 -123. 1629
## 6 -123. 1459
## 7 -123. 1252
## 8 -123. 1232
## 9 -123. 1101
## 10 -123. 1095
## # … with 98,962 more rows
summary(crime_x)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 0 489945 491499 441802 493547 511303
summary(crime_y)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 0 5453651 5456840 4897663 5458638 5512579
summary(crime_h)
## OFFSET TO PROTECT PRIVACY 7XX GRANVILLE ST 6XX GRANVILLE ST
## 54362 4629 3151
## 7XX W GEORGIA ST X NK_LOC ST 6XX W 41ST AVE
## 2427 2244 1900
## 3XX ABBOTT ST 31XX GRANDVIEW HWY 11XX ROBSON ST
## 1795 1784 1758
## 17XX E BROADWAY AVE 5XX RICHARDS ST 3XX E BROADWAY AVE
## 1718 1462 1437
## 5XX W HASTINGS ST 23XX CAMBIE ST 1XX WATER ST
## 1424 1306 1249
## 10XX BURRARD ST 3XX SE MARINE DR 7XX DUNSMUIR ST
## 1162 1139 1113
## 9XX SEYMOUR ST 34XX KINGSWAY AVE 8XX AVISON WAY
## 1101 1071 1048
## 1XX W GEORGIA ST 11XX DAVIE ST 8X W PENDER ST
## 1021 1012 1004
## 16XX DAVIE ST 6XX RICHARDS ST 12XX GRANVILLE ST
## 979 973 953
## 9XX BURRARD ST 5XX SEYMOUR ST 7XX PACIFIC BLVD
## 946 944 918
## 10XX ROBSON ST 33XX KINGSWAY AVE 11XX W HASTINGS ST
## 915 864 844
## 15XX COMMERCIAL DR 10XX BEACH AVE 25XX E HASTINGS ST
## 833 821 819
## 11XX HOWE ST 18XX E HASTINGS ST 1XX W PENDER ST
## 795 776 771
## 14XX ANDERSON ST 8XX BURRARD ST 1XX E HASTINGS ST
## 756 740 732
## 12XX ROBSON ST 4XX SEYMOUR ST 5XX GRANVILLE ST
## 719 716 715
## 5XX ABBOTT ST 3XX W GEORGIA ST 14XX QUEBEC ST
## 695 673 664
## 1XX KEEFER ST 11XX ALBERNI ST 14XX ROBSON ST
## 652 648 645
## 11XX GRANVILLE ST 10XX GRANVILLE ST 35XX GRANDVIEW HWY
## 637 635 627
## 2XX E GEORGIA ST 12XX RICHARDS ST 6XX SEYMOUR ST
## 624 619 618
## 7XX ROBSON ST 17XX ROBSON ST 9XX TERMINAL AVE
## 618 615 610
## 7XX BURRARD ST 7XX BUTE ST 5XX W BROADWAY AVE
## 601 596 592
## 9XX GRANVILLE ST 10XX SEYMOUR ST 8XX W BROADWAY AVE
## 586 585 583
## 56XX VICTORIA DR 8XX SEYMOUR ST 6XX W BROADWAY AVE
## 581 570 562
## 8XX GRANVILLE ST 1XX W BROADWAY AVE 10XX W GEORGIA ST
## 562 555 554
## 8XX HAMILTON ST 5XX W 12TH AVE 9XX W CORDOVA ST
## 546 541 538
## 5XX HORNBY ST 3XX WATER ST 5XX BEATTY ST
## 530 525 510
## 10XX ALBERNI ST 10XX HOMER ST 56XX STANLEY PARK DR
## 509 508 492
## 1XX W CORDOVA ST 10XX HOWE ST 7XX SEYMOUR ST
## 490 482 481
## 8XX W 12TH AVE 13XX ROBSON ST 55XX CAMBIE ST
## 471 470 469
## 1XX E PENDER ST BUTE ST / ROBSON ST 12XX DAVIE ST
## 467 463 462
## SEYMOUR ST / W PENDER ST 12XX HORNBY ST 16XX JOHNSTON ST
## 462 461 459
## 11XX MELVILLE ST 11XX BURRARD ST 5XX W 8TH AVE
## 453 452 451
## 1XX E CORDOVA ST 8XX HOMER ST 11XX HARWOOD ST
## 449 444 438
## (Other)
## 391276
summary(crime_Lt)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 0.00 49.24 49.26 44.22 49.28 49.76
summary(crime_Lg)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## -124.5 -123.1 -123.1 -110.5 -123.1 0.0
Comments
- 54362 incidents are missing in all location information
- X/Y and Latitude/Logitude share the same list for number of crimes
Do all crime types share the same trend through time?
# Comparing different type of crimes by YEAR
crime %>%
tabyl(YEAR, TYPE) %>%
adorn_totals(where = c("row", "col")) %>%
kable()
| YEAR | Break and Enter Commercial | Break and Enter Residential/Other | Homicide | Mischief | Offence Against a Person | Other Theft | Theft from Vehicle | Theft of Bicycle | Theft of Vehicle | Vehicle Collision or Pedestrian Struck (with Fatality) | Vehicle Collision or Pedestrian Struck (with Injury) | Total |
|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 2003 | 3197 | 6883 | 18 | 6391 | 3507 | 2582 | 17744 | 1418 | 6361 | 24 | 1803 | 49928 |
| 2004 | 3283 | 6538 | 22 | 5601 | 3804 | 2605 | 18204 | 1230 | 6102 | 22 | 1868 | 49279 |
| 2005 | 2639 | 5542 | 22 | 5062 | 3771 | 2611 | 16554 | 1416 | 5031 | 32 | 1984 | 44664 |
| 2006 | 2844 | 5674 | 17 | 5184 | 4350 | 2966 | 14734 | 1467 | 3682 | 20 | 1384 | 42322 |
| 2007 | 2436 | 4996 | 19 | 4810 | 4412 | 3024 | 12226 | 1203 | 3305 | 24 | 1237 | 37692 |
| 2008 | 2224 | 4432 | 18 | 5276 | 4226 | 3142 | 11298 | 1176 | 2420 | 17 | 1185 | 35414 |
| 2009 | 1858 | 3497 | 18 | 4430 | 3885 | 3662 | 10007 | 1641 | 1882 | 14 | 1278 | 32172 |
| 2010 | 1656 | 3270 | 10 | 4506 | 3731 | 3432 | 8612 | 1667 | 1467 | 10 | 1327 | 29688 |
| 2011 | 1749 | 3231 | 15 | 4828 | 3870 | 3562 | 7435 | 1517 | 1093 | 11 | 1262 | 28573 |
| 2012 | 1687 | 3311 | 8 | 4243 | 3786 | 3630 | 8097 | 1817 | 1151 | 18 | 1474 | 29222 |
| 2013 | 1774 | 3025 | 7 | 4191 | 3663 | 3488 | 8340 | 2034 | 1034 | 15 | 1485 | 29056 |
| 2014 | 2244 | 3044 | 9 | 4518 | 3158 | 4210 | 10137 | 2461 | 1290 | 13 | 1575 | 32659 |
| 2015 | 2457 | 3121 | 15 | 4193 | 3202 | 4679 | 10544 | 3063 | 1371 | 14 | 1669 | 34328 |
| 2016 | 2686 | 2994 | 11 | 4599 | 3172 | 5708 | 12806 | 2634 | 1474 | 15 | 1699 | 37798 |
| 2017 | 1111 | 1304 | 11 | 2581 | 1605 | 2866 | 5962 | 986 | 755 | 5 | 671 | 17857 |
| Total | 33845 | 60862 | 220 | 70413 | 54142 | 52167 | 172700 | 25730 | 38418 | 254 | 21901 | 530652 |
# Comparing different type of crimes by MONTH
crime %>%
tabyl(MONTH, TYPE) %>%
adorn_totals(where = c("row", "col")) %>%
kable()
| MONTH | Break and Enter Commercial | Break and Enter Residential/Other | Homicide | Mischief | Offence Against a Person | Other Theft | Theft from Vehicle | Theft of Bicycle | Theft of Vehicle | Vehicle Collision or Pedestrian Struck (with Fatality) | Vehicle Collision or Pedestrian Struck (with Injury) | Total |
|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 1 | 3059 | 5783 | 17 | 5711 | 4794 | 4619 | 14943 | 993 | 3499 | 29 | 1771 | 45218 |
| 2 | 2712 | 4723 | 23 | 5378 | 4121 | 4615 | 13171 | 1034 | 3038 | 17 | 1600 | 40432 |
| 3 | 3124 | 5042 | 26 | 6278 | 4562 | 4966 | 14670 | 1370 | 3467 | 22 | 1787 | 45314 |
| 4 | 2838 | 4963 | 19 | 6245 | 4353 | 4604 | 14389 | 1805 | 3099 | 19 | 1704 | 44038 |
| 5 | 2889 | 5032 | 22 | 6431 | 4823 | 4695 | 15296 | 2609 | 3206 | 19 | 1861 | 46883 |
| 6 | 3010 | 4968 | 13 | 6541 | 4628 | 4561 | 14637 | 3159 | 3294 | 23 | 1877 | 46711 |
| 7 | 2693 | 4794 | 18 | 6076 | 4766 | 4238 | 14236 | 3857 | 3156 | 25 | 1820 | 45679 |
| 8 | 2682 | 4828 | 25 | 5810 | 4847 | 4113 | 14638 | 3732 | 3298 | 21 | 1952 | 45946 |
| 9 | 2683 | 5021 | 17 | 5469 | 4325 | 3999 | 14404 | 2793 | 3150 | 15 | 1874 | 43750 |
| 10 | 2811 | 5298 | 11 | 6009 | 4530 | 3933 | 14468 | 2229 | 3338 | 21 | 1918 | 44566 |
| 11 | 2655 | 5269 | 17 | 5310 | 4216 | 3939 | 14282 | 1287 | 3018 | 25 | 1928 | 41946 |
| 12 | 2689 | 5141 | 12 | 5155 | 4177 | 3885 | 13566 | 862 | 2855 | 18 | 1809 | 40169 |
| Total | 33845 | 60862 | 220 | 70413 | 54142 | 52167 | 172700 | 25730 | 38418 | 254 | 21901 | 530652 |
# Comparing different type of crimes by DAY
crime %>%
tabyl(DAY, TYPE) %>%
adorn_totals(where = c("row", "col")) %>%
kable()
| DAY | Break and Enter Commercial | Break and Enter Residential/Other | Homicide | Mischief | Offence Against a Person | Other Theft | Theft from Vehicle | Theft of Bicycle | Theft of Vehicle | Vehicle Collision or Pedestrian Struck (with Fatality) | Vehicle Collision or Pedestrian Struck (with Injury) | Total |
|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 1 | 1175 | 2117 | 12 | 2802 | 2581 | 1567 | 6128 | 1054 | 1330 | 9 | 737 | 19512 |
| 2 | 1149 | 1989 | 9 | 2248 | 1796 | 1668 | 5768 | 825 | 1279 | 8 | 694 | 17433 |
| 3 | 1120 | 2077 | 7 | 2259 | 1778 | 1758 | 5778 | 818 | 1243 | 9 | 670 | 17517 |
| 4 | 1082 | 2059 | 8 | 2221 | 1751 | 1740 | 5592 | 913 | 1308 | 7 | 708 | 17389 |
| 5 | 1144 | 2035 | 4 | 2324 | 1774 | 1835 | 5805 | 854 | 1299 | 6 | 718 | 17798 |
| 6 | 1143 | 1988 | 10 | 2222 | 1727 | 1905 | 5922 | 831 | 1234 | 7 | 736 | 17725 |
| 7 | 1170 | 2135 | 7 | 2310 | 1715 | 1799 | 6038 | 917 | 1321 | 11 | 763 | 18186 |
| 8 | 1153 | 2125 | 10 | 2341 | 1789 | 1883 | 5875 | 893 | 1286 | 12 | 682 | 18049 |
| 9 | 1126 | 2115 | 9 | 2263 | 1690 | 1809 | 5879 | 798 | 1324 | 9 | 694 | 17716 |
| 10 | 1140 | 2120 | 7 | 2280 | 1692 | 1834 | 5908 | 873 | 1329 | 10 | 679 | 17872 |
| 11 | 1234 | 2039 | 15 | 2316 | 1763 | 1936 | 6012 | 888 | 1290 | 10 | 668 | 18171 |
| 12 | 1209 | 2113 | 5 | 2328 | 1735 | 1951 | 6028 | 909 | 1353 | 2 | 755 | 18388 |
| 13 | 1128 | 2150 | 5 | 2288 | 1776 | 1806 | 6056 | 905 | 1312 | 7 | 696 | 18129 |
| 14 | 1144 | 2151 | 8 | 2309 | 1771 | 1888 | 5968 | 861 | 1309 | 5 | 751 | 18165 |
| 15 | 1306 | 2119 | 10 | 2783 | 1796 | 1868 | 6260 | 1008 | 1356 | 8 | 784 | 19298 |
| 16 | 1141 | 2102 | 8 | 2235 | 1826 | 1907 | 5947 | 864 | 1285 | 11 | 750 | 18076 |
| 17 | 1120 | 1995 | 7 | 2314 | 1726 | 1849 | 5976 | 876 | 1254 | 11 | 735 | 17863 |
| 18 | 1072 | 2017 | 4 | 2274 | 1736 | 1798 | 5681 | 889 | 1260 | 4 | 741 | 17476 |
| 19 | 1082 | 1992 | 5 | 2207 | 1699 | 1775 | 5674 | 835 | 1180 | 7 | 729 | 17185 |
| 20 | 1131 | 2032 | 4 | 2365 | 1685 | 1718 | 5802 | 854 | 1196 | 8 | 753 | 17548 |
| 21 | 1098 | 1958 | 1 | 2232 | 1727 | 1622 | 5340 | 910 | 1214 | 9 | 679 | 16790 |
| 22 | 1044 | 1891 | 1 | 2273 | 1781 | 1615 | 5376 | 820 | 1195 | 5 | 734 | 16735 |
| 23 | 1068 | 1905 | 6 | 2283 | 1651 | 1588 | 5211 | 737 | 1191 | 6 | 753 | 16399 |
| 24 | 1087 | 1809 | 8 | 2315 | 1746 | 1425 | 5210 | 744 | 1227 | 7 | 718 | 16296 |
| 25 | 1005 | 1744 | 5 | 2182 | 1819 | 1394 | 4933 | 738 | 1163 | 7 | 666 | 15656 |
| 26 | 949 | 1752 | 7 | 2270 | 1707 | 1496 | 5121 | 700 | 1158 | 10 | 703 | 15873 |
| 27 | 1022 | 1759 | 10 | 2167 | 1682 | 1459 | 5115 | 725 | 1190 | 8 | 735 | 15872 |
| 28 | 992 | 1835 | 8 | 2212 | 1808 | 1517 | 5124 | 796 | 1227 | 17 | 727 | 16263 |
| 29 | 946 | 1779 | 9 | 2156 | 1689 | 1407 | 4860 | 775 | 1181 | 11 | 656 | 15469 |
| 30 | 972 | 1780 | 8 | 2075 | 1644 | 1476 | 5008 | 691 | 1217 | 6 | 672 | 15549 |
| 31 | 693 | 1180 | 3 | 1559 | 1082 | 874 | 3305 | 429 | 707 | 7 | 415 | 10254 |
| Total | 33845 | 60862 | 220 | 70413 | 54142 | 52167 | 172700 | 25730 | 38418 | 254 | 21901 | 530652 |
# Comparing different type of crimes by HOUR
crime %>%
tabyl(HOUR, TYPE) %>%
adorn_totals(where = c("row", "col")) %>%
kable()
| HOUR | Break and Enter Commercial | Break and Enter Residential/Other | Homicide | Mischief | Offence Against a Person | Other Theft | Theft from Vehicle | Theft of Bicycle | Theft of Vehicle | Vehicle Collision or Pedestrian Struck (with Fatality) | Vehicle Collision or Pedestrian Struck (with Injury) | Total |
|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | 2326 | 3049 | 0 | 5616 | 0 | 369 | 11328 | 1687 | 2316 | 9 | 477 | 27177 |
| 1 | 1635 | 1345 | 0 | 3390 | 0 | 285 | 4012 | 500 | 1093 | 12 | 449 | 12721 |
| 2 | 1808 | 1188 | 0 | 2819 | 0 | 223 | 2786 | 314 | 813 | 6 | 380 | 10337 |
| 3 | 2267 | 1099 | 0 | 2189 | 0 | 192 | 1976 | 205 | 499 | 11 | 319 | 8757 |
| 4 | 2589 | 1026 | 0 | 1636 | 0 | 156 | 1674 | 189 | 408 | 10 | 224 | 7912 |
| 5 | 2383 | 1037 | 0 | 1336 | 0 | 179 | 1704 | 204 | 348 | 8 | 263 | 7462 |
| 6 | 1489 | 1397 | 0 | 1283 | 0 | 225 | 2336 | 320 | 538 | 3 | 510 | 8101 |
| 7 | 990 | 2220 | 0 | 1531 | 0 | 368 | 3568 | 571 | 729 | 6 | 819 | 10802 |
| 8 | 822 | 3730 | 0 | 2157 | 0 | 623 | 5296 | 1095 | 1056 | 6 | 1149 | 15934 |
| 9 | 771 | 3247 | 0 | 2205 | 0 | 1355 | 5648 | 1121 | 1073 | 10 | 1046 | 16476 |
| 10 | 690 | 3049 | 0 | 2029 | 0 | 2420 | 4867 | 916 | 959 | 11 | 1050 | 15991 |
| 11 | 545 | 2836 | 0 | 1965 | 0 | 3136 | 4755 | 866 | 902 | 9 | 1041 | 16055 |
| 12 | 689 | 3872 | 0 | 2848 | 0 | 3646 | 8224 | 1651 | 1553 | 17 | 1113 | 23613 |
| 13 | 476 | 2757 | 0 | 2048 | 0 | 4253 | 5765 | 1154 | 991 | 10 | 1069 | 18523 |
| 14 | 554 | 2946 | 0 | 2249 | 0 | 4824 | 6418 | 1297 | 1258 | 17 | 1231 | 20794 |
| 15 | 783 | 3037 | 0 | 2689 | 0 | 5229 | 7596 | 1529 | 1464 | 19 | 1550 | 23896 |
| 16 | 1142 | 2810 | 0 | 3092 | 0 | 5148 | 8984 | 1581 | 1775 | 8 | 1545 | 26085 |
| 17 | 1955 | 3209 | 0 | 3814 | 0 | 4947 | 11586 | 1822 | 2456 | 16 | 1816 | 31621 |
| 18 | 2344 | 3640 | 0 | 4455 | 0 | 4231 | 14942 | 2060 | 3031 | 13 | 1479 | 36195 |
| 19 | 1420 | 2991 | 0 | 3902 | 0 | 3581 | 12161 | 1634 | 2802 | 14 | 1085 | 29590 |
| 20 | 1327 | 2657 | 0 | 3974 | 0 | 2857 | 11645 | 1464 | 2879 | 9 | 895 | 27707 |
| 21 | 1427 | 2587 | 0 | 4247 | 0 | 1974 | 11721 | 1294 | 3202 | 6 | 903 | 27361 |
| 22 | 1640 | 2650 | 0 | 4626 | 0 | 1282 | 13106 | 1285 | 3361 | 14 | 827 | 28791 |
| 23 | 1773 | 2483 | 0 | 4313 | 0 | 664 | 10602 | 971 | 2912 | 10 | 661 | 24389 |
| NA | 0 | 0 | 220 | 0 | 54142 | 0 | 0 | 0 | 0 | 0 | 0 | 54362 |
| Total | 33845 | 60862 | 220 | 70413 | 54142 | 52167 | 172700 | 25730 | 38418 | 254 | 21901 | 530652 |
# Comparing different type of crimes by MINUTE
crime %>%
tabyl(MINUTE, TYPE) %>%
adorn_totals(where = c("row", "col")) %>%
kable()
| MINUTE | Break and Enter Commercial | Break and Enter Residential/Other | Homicide | Mischief | Offence Against a Person | Other Theft | Theft from Vehicle | Theft of Bicycle | Theft of Vehicle | Vehicle Collision or Pedestrian Struck (with Fatality) | Vehicle Collision or Pedestrian Struck (with Injury) | Total |
|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | 11602 | 27328 | 0 | 25813 | 0 | 3259 | 101683 | 14440 | 22242 | 7 | 965 | 207339 |
| 1 | 468 | 704 | 0 | 746 | 0 | 686 | 1093 | 137 | 246 | 2 | 309 | 4391 |
| 2 | 223 | 244 | 0 | 535 | 0 | 736 | 287 | 66 | 54 | 3 | 309 | 2457 |
| 3 | 215 | 275 | 0 | 470 | 0 | 678 | 334 | 61 | 59 | 3 | 304 | 2399 |
| 4 | 224 | 237 | 0 | 469 | 0 | 676 | 305 | 61 | 57 | 2 | 325 | 2356 |
| 5 | 460 | 514 | 0 | 812 | 0 | 1062 | 769 | 158 | 205 | 7 | 444 | 4431 |
| 6 | 222 | 257 | 0 | 489 | 0 | 678 | 313 | 67 | 60 | 2 | 304 | 2392 |
| 7 | 232 | 229 | 0 | 518 | 0 | 685 | 336 | 66 | 52 | 2 | 332 | 2452 |
| 8 | 250 | 272 | 0 | 529 | 0 | 754 | 348 | 60 | 58 | 3 | 334 | 2608 |
| 9 | 241 | 227 | 0 | 525 | 0 | 704 | 321 | 72 | 56 | 4 | 282 | 2432 |
| 10 | 593 | 871 | 0 | 1028 | 0 | 1221 | 1468 | 304 | 327 | 10 | 517 | 6339 |
| 11 | 227 | 241 | 0 | 517 | 0 | 686 | 297 | 51 | 56 | 1 | 334 | 2410 |
| 12 | 262 | 249 | 0 | 526 | 0 | 674 | 348 | 75 | 43 | 5 | 280 | 2462 |
| 13 | 242 | 270 | 0 | 465 | 0 | 681 | 292 | 65 | 54 | 3 | 339 | 2411 |
| 14 | 247 | 240 | 0 | 520 | 0 | 711 | 311 | 56 | 52 | 1 | 312 | 2450 |
| 15 | 1031 | 1920 | 0 | 1877 | 0 | 1544 | 4717 | 807 | 1022 | 10 | 510 | 13438 |
| 16 | 251 | 252 | 0 | 485 | 0 | 695 | 286 | 52 | 64 | 0 | 307 | 2392 |
| 17 | 238 | 257 | 0 | 490 | 0 | 663 | 350 | 70 | 59 | 3 | 306 | 2436 |
| 18 | 238 | 269 | 0 | 538 | 0 | 744 | 321 | 61 | 36 | 4 | 320 | 2531 |
| 19 | 207 | 254 | 0 | 490 | 0 | 705 | 288 | 66 | 49 | 3 | 307 | 2369 |
| 20 | 734 | 1036 | 0 | 1259 | 0 | 1371 | 1952 | 376 | 430 | 9 | 569 | 7736 |
| 21 | 215 | 256 | 0 | 484 | 0 | 698 | 328 | 54 | 52 | 3 | 284 | 2374 |
| 22 | 240 | 249 | 0 | 483 | 0 | 685 | 310 | 60 | 59 | 2 | 300 | 2388 |
| 23 | 235 | 277 | 0 | 511 | 0 | 675 | 304 | 68 | 68 | 1 | 288 | 2427 |
| 24 | 237 | 272 | 0 | 533 | 0 | 681 | 370 | 71 | 60 | 3 | 316 | 2543 |
| 25 | 434 | 529 | 0 | 845 | 0 | 1093 | 765 | 169 | 156 | 12 | 444 | 4447 |
| 26 | 220 | 239 | 0 | 503 | 0 | 714 | 314 | 62 | 62 | 5 | 295 | 2414 |
| 27 | 228 | 266 | 0 | 507 | 0 | 702 | 328 | 52 | 74 | 6 | 322 | 2485 |
| 28 | 257 | 249 | 0 | 518 | 0 | 714 | 313 | 61 | 56 | 4 | 323 | 2495 |
| 29 | 221 | 255 | 0 | 474 | 0 | 668 | 320 | 42 | 60 | 5 | 312 | 2357 |
| 30 | 4258 | 10926 | 0 | 8387 | 0 | 2612 | 34494 | 4536 | 8584 | 14 | 835 | 74646 |
| 31 | 231 | 262 | 0 | 492 | 0 | 680 | 322 | 48 | 60 | 4 | 303 | 2402 |
| 32 | 207 | 250 | 0 | 477 | 0 | 689 | 309 | 49 | 57 | 4 | 307 | 2349 |
| 33 | 207 | 258 | 0 | 501 | 0 | 744 | 345 | 74 | 61 | 0 | 327 | 2517 |
| 34 | 230 | 264 | 0 | 522 | 0 | 705 | 326 | 59 | 71 | 7 | 278 | 2462 |
| 35 | 418 | 450 | 0 | 800 | 0 | 1061 | 692 | 153 | 166 | 8 | 471 | 4219 |
| 36 | 240 | 259 | 0 | 532 | 0 | 728 | 295 | 50 | 63 | 2 | 348 | 2517 |
| 37 | 216 | 242 | 0 | 525 | 0 | 718 | 321 | 56 | 68 | 4 | 322 | 2472 |
| 38 | 249 | 238 | 0 | 515 | 0 | 703 | 315 | 60 | 64 | 3 | 308 | 2455 |
| 39 | 236 | 259 | 0 | 516 | 0 | 709 | 281 | 51 | 67 | 4 | 318 | 2441 |
| 40 | 694 | 971 | 0 | 1175 | 0 | 1373 | 1611 | 306 | 364 | 4 | 531 | 7029 |
| 41 | 234 | 264 | 0 | 498 | 0 | 666 | 303 | 65 | 56 | 6 | 303 | 2395 |
| 42 | 233 | 241 | 0 | 500 | 0 | 762 | 334 | 47 | 62 | 3 | 330 | 2512 |
| 43 | 257 | 255 | 0 | 451 | 0 | 702 | 312 | 51 | 52 | 3 | 345 | 2428 |
| 44 | 237 | 258 | 0 | 501 | 0 | 665 | 306 | 59 | 56 | 4 | 321 | 2407 |
| 45 | 1020 | 2210 | 0 | 1991 | 0 | 1761 | 5586 | 956 | 1244 | 8 | 551 | 15327 |
| 46 | 218 | 224 | 0 | 487 | 0 | 651 | 331 | 53 | 39 | 1 | 304 | 2308 |
| 47 | 249 | 269 | 0 | 519 | 0 | 708 | 343 | 57 | 66 | 2 | 328 | 2541 |
| 48 | 240 | 255 | 0 | 508 | 0 | 736 | 352 | 68 | 68 | 8 | 302 | 2537 |
| 49 | 205 | 249 | 0 | 503 | 0 | 633 | 309 | 48 | 59 | 3 | 325 | 2334 |
| 50 | 736 | 963 | 0 | 1158 | 0 | 1398 | 1855 | 342 | 415 | 8 | 575 | 7450 |
| 51 | 188 | 225 | 0 | 474 | 0 | 722 | 293 | 67 | 51 | 5 | 309 | 2334 |
| 52 | 232 | 257 | 0 | 491 | 0 | 697 | 312 | 76 | 58 | 3 | 306 | 2432 |
| 53 | 234 | 239 | 0 | 524 | 0 | 725 | 333 | 70 | 56 | 5 | 338 | 2524 |
| 54 | 248 | 241 | 0 | 526 | 0 | 666 | 315 | 74 | 56 | 3 | 304 | 2433 |
| 55 | 424 | 553 | 0 | 786 | 0 | 1077 | 843 | 184 | 210 | 7 | 462 | 4546 |
| 56 | 231 | 248 | 0 | 546 | 0 | 685 | 299 | 51 | 54 | 3 | 320 | 2437 |
| 57 | 257 | 249 | 0 | 533 | 0 | 655 | 298 | 60 | 56 | 3 | 309 | 2420 |
| 58 | 247 | 227 | 0 | 476 | 0 | 675 | 321 | 64 | 55 | 0 | 304 | 2369 |
| 59 | 275 | 318 | 0 | 540 | 0 | 618 | 573 | 86 | 152 | 0 | 324 | 2886 |
| NA | 0 | 0 | 220 | 0 | 54142 | 0 | 0 | 0 | 0 | 0 | 0 | 54362 |
| Total | 33845 | 60862 | 220 | 70413 | 54142 | 52167 | 172700 | 25730 | 38418 | 254 | 21901 | 530652 |
Comments
TYPE by Year - All crime types share steady decline until 2010 and remain low rate during the period between 2010 - 2013, and increase again after 2014
# Comparing different type of crimes by NEIGHBOURHOOD
crime %>%
tabyl(NEIGHBOURHOOD, TYPE) %>%
adorn_totals(where = c("row", "col")) %>%
kable()
| NEIGHBOURHOOD | Break and Enter Commercial | Break and Enter Residential/Other | Homicide | Mischief | Offence Against a Person | Other Theft | Theft from Vehicle | Theft of Bicycle | Theft of Vehicle | Vehicle Collision or Pedestrian Struck (with Fatality) | Vehicle Collision or Pedestrian Struck (with Injury) | Total |
|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 3 | 3 | 220 | 253 | 54142 | 4 | 1809 | 110 | 66 | 0 | 14 | 56624 | |
| Arbutus Ridge | 325 | 1672 | 0 | 934 | 0 | 337 | 1852 | 160 | 498 | 3 | 285 | 6066 |
| Central Business District | 9371 | 3505 | 0 | 16672 | 0 | 19244 | 48003 | 6907 | 4016 | 41 | 3188 | 110947 |
| Dunbar-Southlands | 294 | 1847 | 0 | 1324 | 0 | 241 | 2899 | 240 | 629 | 3 | 269 | 7746 |
| Fairview | 3303 | 3834 | 0 | 3196 | 0 | 3269 | 11934 | 3394 | 2037 | 14 | 1180 | 32161 |
| Grandview-Woodland | 2082 | 4515 | 0 | 4970 | 0 | 2508 | 7342 | 1403 | 3111 | 9 | 1240 | 27180 |
| Hastings-Sunrise | 929 | 3199 | 0 | 2904 | 0 | 1379 | 5654 | 321 | 2452 | 18 | 1270 | 18126 |
| Kensington-Cedar Cottage | 1277 | 4136 | 0 | 3760 | 0 | 2961 | 7474 | 859 | 2919 | 11 | 1544 | 24941 |
| Kerrisdale | 326 | 1826 | 0 | 1049 | 0 | 265 | 2805 | 179 | 547 | 7 | 443 | 7447 |
| Killarney | 302 | 2130 | 0 | 1761 | 0 | 245 | 3990 | 163 | 1302 | 12 | 570 | 10475 |
| Kitsilano | 2092 | 4390 | 0 | 3692 | 0 | 1730 | 8912 | 2464 | 2366 | 13 | 1040 | 26699 |
| Marpole | 1098 | 2527 | 0 | 1905 | 0 | 612 | 4151 | 232 | 1617 | 10 | 931 | 13083 |
| Mount Pleasant | 2769 | 3278 | 0 | 4070 | 0 | 3698 | 9679 | 2746 | 2654 | 18 | 1624 | 30536 |
| Musqueam | 17 | 86 | 0 | 104 | 0 | 1 | 217 | 7 | 40 | 1 | 59 | 532 |
| Oakridge | 332 | 2089 | 0 | 889 | 0 | 1176 | 2290 | 172 | 669 | 6 | 414 | 8037 |
| Renfrew-Collingwood | 1197 | 4296 | 0 | 3886 | 0 | 4119 | 8420 | 419 | 3011 | 12 | 1401 | 26761 |
| Riley Park | 848 | 2706 | 0 | 1795 | 0 | 410 | 4269 | 621 | 1197 | 4 | 671 | 12521 |
| Shaughnessy | 129 | 1774 | 0 | 633 | 0 | 25 | 1769 | 139 | 371 | 7 | 579 | 5426 |
| South Cambie | 314 | 1109 | 0 | 606 | 0 | 759 | 1529 | 221 | 435 | 2 | 237 | 5212 |
| Stanley Park | 72 | 65 | 0 | 246 | 0 | 13 | 2868 | 214 | 74 | 6 | 217 | 3775 |
| Strathcona | 2168 | 2019 | 0 | 4556 | 0 | 994 | 7343 | 1015 | 1650 | 20 | 1154 | 20919 |
| Sunset | 1105 | 2578 | 0 | 3243 | 0 | 1401 | 5226 | 255 | 2275 | 17 | 1296 | 17396 |
| Victoria-Fraserview | 386 | 2499 | 0 | 1761 | 0 | 483 | 3390 | 132 | 1372 | 10 | 786 | 10819 |
| West End | 2775 | 3480 | 0 | 5325 | 0 | 6033 | 16904 | 2985 | 2660 | 6 | 1184 | 41352 |
| West Point Grey | 331 | 1299 | 0 | 879 | 0 | 260 | 1971 | 372 | 450 | 4 | 305 | 5871 |
| Total | 33845 | 60862 | 220 | 70413 | 54142 | 52167 | 172700 | 25730 | 38418 | 254 | 21901 | 530652 |
Comments
### TYPE ###
# Number by Crime Type by Year
ggplot(crime, aes(x = YEAR, color = TYPE)) +
geom_line(stat = 'count') +
#geom_point(stat = 'count') +
theme_classic() + labs(title = "Number by Crime Types by Year", x = "Year", y = "Number of Crimes" ) +
theme(axis.title = element_text(colour = "dark green"), title = element_text(colour = "blue"))
# Number by Crime Types by Month
ggplot(crime, aes(x = MONTH, color = TYPE)) +
geom_line(stat = 'count') +
#geom_point(stat = 'count') +
theme_classic() + labs(title = "Numbers by Crime Types by Month", x = "Month", y = "Number of Crimes" ) +
theme(axis.title = element_text(colour = "dark green"), title = element_text(colour = "red"))
# Number by Crime Types by Day
ggplot(crime, aes(x = DAY, color = TYPE)) +
geom_line(stat = 'count') +
#geom_point(stat = 'count') +
theme_classic() + labs(title = "Numbers by Crime Types by Day", x = "Day", y = "Number of Crimes" ) +
theme(axis.title = element_text(colour = "dark green"), title = element_text(colour = "red"))
# Number by Crime Types by Hour
ggplot(crime, aes(x = HOUR, color = TYPE)) +
geom_line(stat = 'count') +
#geom_point(stat = 'count') +
theme_classic() + labs(title = "Numbers by Crime Types by Hour", x = "Hour", y = "Number of Crimes" ) +
theme(axis.title = element_text(colour = "dark green"), title = element_text(colour = "blue"))
## Warning: Removed 54362 rows containing non-finite values (stat_count).
# Number by Crime Types by Minute
ggplot(crime, aes(x = MINUTE, color = TYPE)) +
geom_line(stat = 'count') +
#geom_point(stat = 'count') +
theme_classic() + labs(title = "Numbers by Crime Types by Minute", x = "Minute", y = "Number of Crimes" ) +
theme(axis.title = element_text(colour = "dark green"), title = element_text(colour = "red "))
## Warning: Removed 54362 rows containing non-finite values (stat_count).
- TYPE by YEAR - Other Theft continues to increase - Mischief streadily decreases with a bit of up and down through years - Theft of Vehicle/Break and Enter Residential/Other sizably drops through years - While Theft from Vehicle takes majority of crime incidents, number of cases sharply declines from 2004 to 2011, then it starts to bounce up until recently years.
Group by patterns over years
Group 1(increase-decrease-increase): ‘Break and Enter Commercial’, ‘Theft from Vehicle’, ‘Vehicle Collision or Pedestrian Struck (with Injury)’
Group 2(constantly decrease): ‘Break and Enter Residential/Other’, ‘Mischief’, ‘Theft of Vehicle’, ‘Offence Against a Person’
Group 3(constantly increase): ‘Other Theft’, ‘Theft of Bicycle’
Group 4 (number of cases too small to get a conclusive pattern): ‘Homicide’, ‘Vehicle Collision or Pedestrian Struck (with Fatality)’
ggplot(crime, aes(x = YEAR, color = TYPE)) +
geom_line(stat = 'count') +
#geom_point(stat = 'count') +
theme_classic() + labs(title = "Number by Crime Types by Year", x = "Year", y = "Number of Crimes" ) +
theme(axis.title = element_text(colour = "dark green"), title = element_text(colour = "blue"))
#-------------------------------------#
comm <- crime %>% filter(TYPE %in% 'Break and Enter Commercial')
resit <- crime %>% filter(TYPE %in% 'Break and Enter Residential/Other')
homi <- crime %>% filter(TYPE %in% 'Homicide')
mis <- crime %>% filter(TYPE %in% 'Mischief')
off <- crime %>% filter(TYPE %in% 'Offence Against a Person')
ot <- crime %>% filter(TYPE %in% 'Other Theft')
tfv <- crime %>% filter(TYPE %in% 'Theft from Vehicle')
tb <- crime %>% filter(TYPE %in% 'Theft of Bicycle')
tv <- crime %>% filter(TYPE %in% 'Theft of Vehicle')
vcf <- crime %>% filter(TYPE %in% 'Vehicle Collision or Pedestrian Struck (with Fatality)')
vci <- crime %>% filter(TYPE %in% 'Vehicle Collision or Pedestrian Struck (with Injury)')
grid.arrange(
ggplot(comm, aes(x = YEAR)) + geom_line(stat = 'count', color = 'blue') + geom_point(stat = 'count', color = 'blue') + theme_classic() + labs(title = "Break and Enter Commercial" , x= ' ' , y = ' '),
ggplot(resit, aes(x = YEAR)) + geom_line(stat = 'count', color = 'red') + geom_point(stat = 'count', color = 'red') + theme_classic() + labs(title = "Break and Enter Residential/Other", x= ' ' , y = ' ' ),
ggplot(homi, aes(x = YEAR)) + geom_line(stat = 'count', color = ' green') + geom_point(stat = 'count', color = 'green') + theme_classic() + labs(title = "Homicide", x= ' ' , y = ' ' ),
ggplot(mis, aes(x = YEAR)) + geom_line(stat = 'count', color = 'purple') + geom_point(stat = 'count', color = 'purple') + theme_classic() + labs(title = "Mischief", x= ' ' , y = ' ' ),
ggplot(off, aes(x = YEAR)) + geom_line(stat = 'count', color = 'dark green') + geom_point(stat = 'count', color = 'dark green') + theme_classic() + labs(title = "Offence Against a Person", x= ' ' , y = ' ' ),
ggplot(ot, aes(x = YEAR)) + geom_line(stat = 'count', color = 'pink') + geom_point(stat = 'count', color = 'pink') + theme_classic() + labs(title = "Other Theft", x= ' ' , y = ' ' ),
ggplot(tfv, aes(x = YEAR)) + geom_line(stat = 'count', color = 'dark green') + geom_point(stat = 'count', color = 'dark green') + theme_classic() + labs(title = 'Theft from Vehicle', x= ' ' , y = ' ' ),
ggplot(tb, aes(x = YEAR)) + geom_line(stat = 'count', color = 'orange') + geom_point(stat = 'count', color = 'orange') + theme_classic() + labs(title = 'Theft of Bicycle', x= ' ' , y = ' ' ),
ggplot(tv, aes(x = YEAR)) + geom_line(stat = 'count', color = 'purple') + geom_point(stat = 'count', color = 'purple') + theme_classic() + labs(title = 'Theft of Vehicle', x= ' ' , y = ' ' ),
ggplot(vcf, aes(x = YEAR)) + geom_line(stat = 'count', color = 'light blue') + geom_point(stat = 'count', color = 'light blue') + theme_classic() + labs(title = 'Vehicle Collision or Pedestrian Struck(with Fatality)', x= ' ' , y = ' ' ),
ggplot(vci, aes(x = YEAR)) + geom_line(stat = 'count', color = 'purple') + geom_point(stat = 'count', color = 'purple') + theme_classic() + labs(title = 'Vehicle Collision or Pedestrian Struck(with Injury)', x= ' ' , y = ' ' )
)
Group by patterns over months
Group 1 ( random pattern = up&down all year round): ‘Break and Enter Commercial’, ‘Vehicle Collision or Pedestrian Struck (with Injury)’, ‘Break and Enter Residential/Other’
Group 2 ( increase toward summer & decrease toward winter): ‘Offence Against a Person’, ‘Theft from Vehicle’, ‘Theft of Bicycle’, ‘Theft of Vehicle’,
Group 3 (decrease toward end of year): “Other Theft”, ‘Mischief’
Group 4 (too small sample size, random pattern) : ‘Homicide’, ‘Vehicle Collision or Pedestrian Struck (with Fatality)’
Most types of crime decline december
ggplot(crime, aes(x = MONTH, color = TYPE)) +
geom_line(stat = 'count') +
#geom_point(stat = 'count') +
theme_classic() + labs(title = "Number by Crime Types by Month", x = "Year", y = "Number of Crimes" ) +
theme(axis.title = element_text(colour = "dark green"), title = element_text(colour = "blue"))
#-------------------------------------#
grid.arrange(
ggplot(comm, aes(x = MONTH)) + geom_line(stat = 'count', color = 'blue') + geom_point(stat = 'count', color = 'blue') + theme_classic() + labs(title = "Break and Enter Commercial" , x= ' ' , y = ' '),
ggplot(resit, aes(x = MONTH)) + geom_line(stat = 'count', color = 'red') + geom_point(stat = 'count', color = 'red') + theme_classic() + labs(title = "Break and Enter Residential/Other", x= ' ' , y = ' ' ),
ggplot(homi, aes(x = MONTH)) + geom_line(stat = 'count', color = ' green') + geom_point(stat = 'count', color = 'green') + theme_classic() + labs(title = "Homicide", x= ' ' , y = ' ' ),
ggplot(mis, aes(x = MONTH)) + geom_line(stat = 'count', color = 'purple') + geom_point(stat = 'count', color = 'purple') + theme_classic() + labs(title = "Mischief", x= ' ' , y = ' ' ),
ggplot(off, aes(x = MONTH)) + geom_line(stat = 'count', color = 'dark green') + geom_point(stat = 'count', color = 'dark green') + theme_classic() + labs(title = "Offence Against a Person", x= ' ' , y = ' ' ),
ggplot(ot, aes(x = MONTH)) + geom_line(stat = 'count', color = 'pink') + geom_point(stat = 'count', color = 'pink') + theme_classic() + labs(title = "Other Theft", x= ' ' , y = ' ' ),
ggplot(tfv, aes(x = MONTH)) + geom_line(stat = 'count', color = 'dark green') + geom_point(stat = 'count', color = 'dark green') + theme_classic() + labs(title = 'Theft from Vehicle', x= ' ' , y = ' ' ),
ggplot(tb, aes(x = MONTH)) + geom_line(stat = 'count', color = 'orange') + geom_point(stat = 'count', color = 'orange') + theme_classic() + labs(title = 'Theft of Bicycle', x= ' ' , y = ' ' ),
ggplot(tv, aes(x = MONTH)) + geom_line(stat = 'count', color = 'purple') + geom_point(stat = 'count', color = 'purple') + theme_classic() + labs(title = 'Theft of Vehicle', x= ' ' , y = ' ' ),
ggplot(vcf, aes(x = MONTH)) + geom_line(stat = 'count', color = 'light blue') + geom_point(stat = 'count', color = 'light blue') + theme_classic() + labs(title = 'Vehicle Collision or Pedestrian Struck(with Fatality)', x= ' ' , y = ' ' ),
ggplot(vci, aes(x = MONTH)) + geom_line(stat = 'count', color = 'purple') + geom_point(stat = 'count', color = 'purple') + theme_classic() + labs(title = 'Vehicle Collision or Pedestrian Struck(with Injury)', x= ' ' , y = ' ' )
)
Group by patterns over days
Group 1 ( high on 1st & 15th - sign of organized crime): ‘Break and Enter Commercial’, ‘Mischief’, ‘Theft of Bicycle’
Group 2 ( Gradually decrease toward end of month): ‘Break and Enter Residential/Other’, ‘Other Theft’, ‘Theft from Vehicle’, ‘Theft of Vehicle’
Group 3 ( flat with minor up&down ): ‘Offence Against a Person’, ‘Vehicle Collision or Pedestrian Struck (with Injury)’
Group 4 ( too small number, no pattern): ‘Homicide’, ‘Vehicle Collision or Pedestrian Struck (with Fatality)’
# Number by Crime Types by Day
ggplot(crime, aes(x = DAY, color = TYPE)) +
geom_line(stat = 'count') +
#geom_point(stat = 'count') +
theme_classic() + labs(title = "Numbers by Crime Types by Day", x = "Day", y = "Number of Crimes" ) +
theme(axis.title = element_text(colour = "dark green"), title = element_text(colour = "red"))
grid.arrange(
ggplot(comm, aes(x = DAY)) + geom_line(stat = 'count', color = 'blue') + geom_point(stat = 'count', color = 'blue') + theme_classic() + labs(title = "Break and Enter Commercial" , x= ' ' , y = ' '),
ggplot(resit, aes(x = DAY)) + geom_line(stat = 'count', color = 'red') + geom_point(stat = 'count', color = 'red') + theme_classic() + labs(title = "Break and Enter Residential/Other", x= ' ' , y = ' ' ),
ggplot(homi, aes(x = DAY)) + geom_line(stat = 'count', color = ' green') + geom_point(stat = 'count', color = 'green') + theme_classic() + labs(title = "Homicide", x= ' ' , y = ' ' ),
ggplot(mis, aes(x = DAY)) + geom_line(stat = 'count', color = 'purple') + geom_point(stat = 'count', color = 'purple') + theme_classic() + labs(title = "Mischief", x= ' ' , y = ' ' ),
ggplot(off, aes(x = DAY)) + geom_line(stat = 'count', color = 'dark green') + geom_point(stat = 'count', color = 'dark green') + theme_classic() + labs(title = "Offence Against a Person", x= ' ' , y = ' ' ),
ggplot(ot, aes(x = DAY)) + geom_line(stat = 'count', color = 'pink') + geom_point(stat = 'count', color = 'pink') + theme_classic() + labs(title = "Other Theft", x= ' ' , y = ' ' ),
ggplot(tfv, aes(x = DAY)) + geom_line(stat = 'count', color = 'dark green') + geom_point(stat = 'count', color = 'dark green') + theme_classic() + labs(title = 'Theft from Vehicle', x= ' ' , y = ' ' ),
ggplot(tb, aes(x = DAY)) + geom_line(stat = 'count', color = 'orange') + geom_point(stat = 'count', color = 'orange') + theme_classic() + labs(title = 'Theft of Bicycle', x= ' ' , y = ' ' ),
ggplot(tv, aes(x = DAY)) + geom_line(stat = 'count', color = 'purple') + geom_point(stat = 'count', color = 'purple') + theme_classic() + labs(title = 'Theft of Vehicle', x= ' ' , y = ' ' ),
ggplot(vcf, aes(x = DAY)) + geom_line(stat = 'count', color = 'light blue') + geom_point(stat = 'count', color = 'light blue') + theme_classic() + labs(title = 'Vehicle Collision or Pedestrian Struck(with Fatality)', x= ' ' , y = ' ' ),
ggplot(vci, aes(x = DAY)) + geom_line(stat = 'count', color = 'purple') + geom_point(stat = 'count', color = 'purple') + theme_classic() + labs(title = 'Vehicle Collision or Pedestrian Struck(with Injury)', x= ' ' , y = ' ' )
)
Group by patterns over hours
Group 1 (High early morning & evening): ‘Break and Enter Commercial’
Group 2 ( Low early morning, increase toward 18 hour & high until 24 hour): ‘Break and Enter Residential/Other’, ‘Mischief’, ‘Theft from Vehicle’, ‘Theft of Bicycle’, ‘Theft of Vehicle’
Group 3 ( Peak at 18 hour): ‘Other Theft’, ‘Vehicle Collision or Pedestrian Struck (with Injury)’
Group 4 (info N/A & too small number - no pattern): ‘Homicide’, ‘Vehicle Collision or Pedestrian Struck (with Fatality)’, ‘Offence Against a Person’
# Number by Crime Types by Hour
ggplot(crime, aes(x = HOUR, color = TYPE)) +
geom_line(stat = 'count') +
#geom_point(stat = 'count') +
theme_classic() + labs(title = "Numbers by Crime Types by Hour", x = "Hour", y = "Number of Crimes" ) +
theme(axis.title = element_text(colour = "dark green"), title = element_text(colour = "blue"))
## Warning: Removed 54362 rows containing non-finite values (stat_count).
grid.arrange(
ggplot(comm, aes(x = HOUR)) + geom_line(stat = 'count', color = 'blue') + geom_point(stat = 'count', color = 'blue') + theme_classic() + labs(title = "Break and Enter Commercial" , x= ' ' , y = ' '),
ggplot(resit, aes(x = HOUR)) + geom_line(stat = 'count', color = 'red') + geom_point(stat = 'count', color = 'red') + theme_classic() + labs(title = "Break and Enter Residential/Other", x= ' ' , y = ' ' ),
ggplot(homi, aes(x = HOUR)) + geom_line(stat = 'count', color = ' green') + geom_point(stat = 'count', color = 'green') + theme_classic() + labs(title = "Homicide", x= ' ' , y = ' ' ),
ggplot(mis, aes(x = HOUR)) + geom_line(stat = 'count', color = 'purple') + geom_point(stat = 'count', color = 'purple') + theme_classic() + labs(title = "Mischief", x= ' ' , y = ' ' ),
ggplot(off, aes(x = HOUR)) + geom_line(stat = 'count', color = 'dark green') + geom_point(stat = 'count', color = 'dark green') + theme_classic() + labs(title = "Offence Against a Person", x= ' ' , y = ' ' ),
ggplot(ot, aes(x = HOUR)) + geom_line(stat = 'count', color = 'pink') + geom_point(stat = 'count', color = 'pink') + theme_classic() + labs(title = "Other Theft", x= ' ' , y = ' ' ),
ggplot(tfv, aes(x = HOUR)) + geom_line(stat = 'count', color = 'dark green') + geom_point(stat = 'count', color = 'dark green') + theme_classic() + labs(title = 'Theft from Vehicle', x= ' ' , y = ' ' ),
ggplot(tb, aes(x = HOUR)) + geom_line(stat = 'count', color = 'orange') + geom_point(stat = 'count', color = 'orange') + theme_classic() + labs(title = 'Theft of Bicycle', x= ' ' , y = ' ' ),
ggplot(tv, aes(x = HOUR)) + geom_line(stat = 'count', color = 'purple') + geom_point(stat = 'count', color = 'purple') + theme_classic() + labs(title = 'Theft of Vehicle', x= ' ' , y = ' ' ),
ggplot(vcf, aes(x = HOUR)) + geom_line(stat = 'count', color = 'light blue') + geom_point(stat = 'count', color = 'light blue') + theme_classic() + labs(title = 'Vehicle Collision or Pedestrian Struck(with Fatality)', x= ' ' , y = ' ' ),
ggplot(vci, aes(x = HOUR)) + geom_line(stat = 'count', color = 'purple') + geom_point(stat = 'count', color = 'purple') + theme_classic() + labs(title = 'Vehicle Collision or Pedestrian Struck(with Injury)', x= ' ' , y = ' ' )
)
## Warning: Removed 220 rows containing non-finite values (stat_count).
## Warning: Removed 220 rows containing non-finite values (stat_count).
## Warning: Removed 54142 rows containing non-finite values (stat_count).
## Warning: Removed 54142 rows containing non-finite values (stat_count).
Group by patterns over hours
Group 1(Most cases occured at 0 & 30 minute - organized crime): ‘Break and Enter Commercial’, ‘Break and Enter Residential/Other’, ‘Mischief’, ‘Theft from Vehicle’, ‘Theft of Bicycle’, ‘Theft of Vehicle’
Group 2( Up&down with peaks at 0 and 30 min - mix of disorganized/organized crime): ‘Other Theft’, ‘Vehicle Collision or Pedestrian Struck (with Injury)’
Group 3(No info or too small number of incidents): ‘Homicide’, ‘Vehicle Collision or Pedestrian Struck (with Fatality)’ , ‘Offence Against a Person’
Other Theft and Vehicle Collision or Pedestrian Struck(with Injury) - mix of organized and disorganized
Vehicle Collision or Pedestrian Struck(with Fatality) - randomly distributed, it means most cases are not planned - disorganized
All others - share a specific pattern of having four quarters with majority occurred at 0 and 30 minute - organized
# Number by Crime Types by Minute
ggplot(crime, aes(x = MINUTE, color = TYPE)) +
geom_line(stat = 'count') +
#geom_point(stat = 'count') +
theme_classic() + labs(title = "Numbers by Crime Types by Minute", x = "Minute", y = "Number of Crimes" ) +
theme(axis.title = element_text(colour = "dark green"), title = element_text(colour = "red "))
## Warning: Removed 54362 rows containing non-finite values (stat_count).
grid.arrange(
ggplot(comm, aes(x = MINUTE)) + geom_line(stat = 'count', color = 'blue') + geom_point(stat = 'count', color = 'blue') + theme_classic() + labs(title = "Break and Enter Commercial" , x= ' ' , y = ' '),
ggplot(resit, aes(x = MINUTE)) + geom_line(stat = 'count', color = 'red') + geom_point(stat = 'count', color = 'red') + theme_classic() + labs(title = "Break and Enter Residential/Other", x= ' ' , y = ' ' ),
ggplot(homi, aes(x = MINUTE)) + geom_line(stat = 'count', color = ' green') + geom_point(stat = 'count', color = 'green') + theme_classic() + labs(title = "Homicide", x= ' ' , y = ' ' ),
ggplot(mis, aes(x = MINUTE)) + geom_line(stat = 'count', color = 'purple') + geom_point(stat = 'count', color = 'purple') + theme_classic() + labs(title = "Mischief", x= ' ' , y = ' ' ),
ggplot(off, aes(x = MINUTE)) + geom_line(stat = 'count', color = 'dark green') + geom_point(stat = 'count', color = 'dark green') + theme_classic() + labs(title = "Offence Against a Person", x= ' ' , y = ' ' ),
ggplot(ot, aes(x = MINUTE)) + geom_line(stat = 'count', color = 'pink') + geom_point(stat = 'count', color = 'pink') + theme_classic() + labs(title = "Other Theft", x= ' ' , y = ' ' ),
ggplot(tfv, aes(x = MINUTE)) + geom_line(stat = 'count', color = 'dark green') + geom_point(stat = 'count', color = 'dark green') + theme_classic() + labs(title = 'Theft from Vehicle', x= ' ' , y = ' ' ),
ggplot(tb, aes(x = MINUTE)) + geom_line(stat = 'count', color = 'orange') + geom_point(stat = 'count', color = 'orange') + theme_classic() + labs(title = 'Theft of Bicycle', x= ' ' , y = ' ' ),
ggplot(tv, aes(x = MINUTE)) + geom_line(stat = 'count', color = 'purple') + geom_point(stat = 'count', color = 'purple') + theme_classic() + labs(title = 'Theft of Vehicle', x= ' ' , y = ' ' ),
ggplot(vcf, aes(x = MINUTE)) + geom_line(stat = 'count', color = 'light blue') + geom_point(stat = 'count', color = 'light blue') + theme_classic() + labs(title = 'Vehicle Collision or Pedestrian Struck(with Fatality)', x= ' ' , y = ' ' ),
ggplot(vci, aes(x = MINUTE)) + geom_line(stat = 'count', color = 'purple') + geom_point(stat = 'count', color = 'purple') + theme_classic() + labs(title = 'Vehicle Collision or Pedestrian Struck(with Injury)', x= ' ' , y = ' ' )
)
## Warning: Removed 220 rows containing non-finite values (stat_count).
## Warning: Removed 220 rows containing non-finite values (stat_count).
## Warning: Removed 54142 rows containing non-finite values (stat_count).
## Warning: Removed 54142 rows containing non-finite values (stat_count).
# Create new type variable
#crime$CLASS <- ifelse(crime$TYPE == c('Break and Enter Commercial', 'Mischief', 'Theft from Vehicle'), 'Class1', ifelse(crime$TYPE == c('Theft of Bicycle','Theft of Vehicle', 'Other Theft', 'Vehicle Collision or Pedestrian Struck (with Injury)', 'Offence Against a Person', 'Break and Enter Residential/Other'), 'Class2', 'Class3'))
crime$CLASS <- ifelse(crime$TYPE %in% c('Break and Enter Commercial','Break and Enter Residential/Other'), 'B&E', ifelse(crime$TYPE %in% c('Theft from Vehicle', 'Theft of Bicycle', 'Theft of Vehicle', 'Other Theft'), 'Theft', ifelse(crime$TYPE %in% c('Vehicle Collision or Pedestrian Struck (with Injury)','Vehicle Collision or Pedestrian Struck (with Fatality)'), 'Vehicle Crime', 'Other')))
#YEAR
crime %>%
ggplot(aes(x = YEAR, color = CLASS)) + geom_line(stat = 'count') + theme_classic() + labs(title = "Numbers by Crime Class by Year", x = "Year", y = "Number of Crimes" ) +
theme(axis.title = element_text(colour = "dark green"), title = element_text(colour = "blue"))
#MONTH
crime %>%
ggplot(aes(x = MONTH, color = CLASS)) + geom_line(stat = 'count') + theme_classic() + labs(title = "Numbers by Crime Class by Month", x = "Month", y = "Number of Crimes" ) +
theme(axis.title = element_text(colour = "dark green"), title = element_text(colour = "blue"))
#DAY
crime %>%
ggplot(aes(x = DAY, color = CLASS)) + geom_line(stat = 'count') + theme_classic() + labs(title = "Numbers by Crime Class by Day", x = "Day", y = "Number of Crimes" ) +
theme(axis.title = element_text(colour = "dark green"), title = element_text(colour = "blue"))
#HOUR (N/A for 'Offence Against a Person' and 'Homicide)
crime %>%
ggplot(aes(x = HOUR, color = CLASS)) + geom_line(stat = 'count') + theme_classic() + labs(title = "Numbers by Crime Class by Hour", x = "Hour", y = "Number of Crimes" ) +
theme(axis.title = element_text(colour = "dark green"), title = element_text(colour = "blue"))
## Warning: Removed 54362 rows containing non-finite values (stat_count).
#MINUTE (N/A for 'Offence Against a Person' and 'Homicide)
crime %>%
ggplot(aes(x = MINUTE, color = CLASS)) + geom_line(stat = 'count') + theme_classic() + labs(title = "Numbers by Crime Class by Minute", x = "Minute", y = "Number of Crimes" ) +
theme(axis.title = element_text(colour = "dark green"), title = element_text(colour = "blue"))
## Warning: Removed 54362 rows containing non-finite values (stat_count).
* Year - Theft crime rate decreases until 2011, then it sharply increases again - types of Break and Enter crimes gradually decreases over the years - Vehicle collision crimes, although it is slow and gradual, decreases - The rate of other crimes does not change
### NEIGHBOURHOOD ###
# Crime incidents by Neighbourhood by Year
ggplot(crime, aes(x = YEAR, color = NEIGHBOURHOOD)) +
geom_line(stat = 'count') +
#geom_point(stat = 'count') +
theme_classic() + labs(title = "Numbers by Crime Types by Year", x = "Year", y = "Number of Crimes" ) +
theme(axis.title = element_text(colour = "dark green"), title = element_text(colour = "blue"))
# Crime incidents by Neighbourhood by Month
ggplot(crime, aes(x = MONTH, color = NEIGHBOURHOOD)) +
geom_line(stat = 'count') +
#geom_point(stat = 'count') +
theme_classic() + labs(title = "Numbers by Crime Types by Month", x = "Month", y = "Number of Crimes" ) +
theme(axis.title = element_text(colour = "dark green"), title = element_text(colour = "blue"))
# Crime incidents by Neighbourhood by Day
ggplot(crime, aes(x = DAY, color = NEIGHBOURHOOD)) +
geom_line(stat = 'count') +
#geom_point(stat = 'count') +
theme_classic() + labs(title = "Numbers by Crime Types by Day", x = "Day", y = "Number of Crimes" ) +
theme(axis.title = element_text(colour = "dark green"), title = element_text(colour = "blue"))
# Crime incidents by Neighbourhood by Hour
ggplot(crime, aes(x = HOUR, color = NEIGHBOURHOOD)) +
geom_line(stat = 'count') +
#geom_point(stat = 'count') +
theme_classic() + labs(title = "Numbers by Crime Types by Hour", x = "Hour", y = "Number of Crimes" ) +
theme(axis.title = element_text(colour = "dark green"), title = element_text(colour = "blue"))
## Warning: Removed 54362 rows containing non-finite values (stat_count).
# Crime incidents by Neighbourhood by Minute
ggplot(crime, aes(x = MINUTE, color = NEIGHBOURHOOD)) +
geom_line(stat = 'count') +
#geom_point(stat = 'count') +
theme_classic() + labs(title = "Numbers by Crime Types by Minute", x = "Minute", y = "Number of Crimes" ) +
theme(axis.title = element_text(colour = "dark green"), title = element_text(colour = "blue"))
## Warning: Removed 54362 rows containing non-finite values (stat_count).
- Split neighbourhoods into two groups
Group 1(Top 10 neighbourhood with highest crime rate): Central Business District, West End, Fairview, Mount Pleasant, Grandview-Woodland, Renfrew-Collingwood, Kitsilano, Kensington-Cedar Cottage, Strathcona, Hastings-Sunrise, Sunset
Group 2(Others): Marpole, Riley Park, Victoria-Fraserview, Killarney, Oakridge, Dunbar-Southlands, Kerrisdale, Arbutus Ridge, West Point Grey, Shaughnessy, South Cambie, Stanley Park, Musqueam
# Create new neighbourhood variable
#crime <- mutate(crime, COMMUNITY = as.factor(
#ifelse(NEIGHBOURHOOD == c('Central Business District', 'West End', 'Fairview', 'Mount Pleasant', 'Grandview-Woodland', 'Renfrew-Collingwood', 'Kitsilano', 'Kensington-Cedar Cottage', 'Strathcona', 'Hastings-Sunrise', 'Sunset'), "Unsafe_comm", 'safe_comm' )))
#ifelse(NEIGHBOURHOOD == c('Marpole', 'Riley Park', 'Victoria-Fraserview', 'Killarney', 'Oakridge', 'Dunbar-Southlands', 'Kerrisdale', 'Arbutus Ridge', 'West Point Grey', 'Shaughnessy', 'South Cambie', 'Stanley Park', 'Musqueam'), 'safe_comm', "N/A"))))
z <- count(crime, NEIGHBOURHOOD) %>% arrange(desc(n))
view(z)
Split neighbourhoods into two groups
56624 cases missing location information and they are labeled as “OFFSET TO PROTECT PRIVACY”
Group 1(Top 8, number of crimes > 24000 - a large drop in crime numbers by 4000 between Kensington and Strathcona): neighbourhood with highest crime rate): Central Business District, West End, Fairview, Mount Pleasant, Grandview-Woodland, Renfrew-Collingwood, Kitsilano, Kensington-Cedar Cottage
Group 2(Others): Strathcona, Hastings-Sunrise, Sunset, Marpole, Riley Park, Victoria-Fraserview, Killarney, Oakridge, Dunbar-Southlands, Kerrisdale, Arbutus Ridge, West Point Grey, Shaughnessy, South Cambie, Stanley Park, Musqueam
# Create new neighbourhood variable
#crime <- mutate(crime, COMMUNITY = as.factor(
# ifelse(NEIGHBOURHOOD %in% c('Central Business District', 'West End', 'Fairview', 'Mount Pleasant', 'Grandview-Woodland', 'Renfrew-Collingwood', 'Kitsilano', 'Kensington-Cedar Cottage', 'Strathcona', 'Hastings-Sunrise', 'Sunset'), "Unsafe_comm", 'safe_comm', ifelse(NEIGHBOURHOOD %in% c('Marpole', 'Riley Park', 'Victoria-Fraserview', 'Killarney', 'Oakridge', 'Dunbar-Southlands', 'Kerrisdale', 'Arbutus Ridge', 'West Point Grey', 'Shaughnessy', 'South Cambie', 'Stanley Park', 'Musqueam'), 'safe_comm', "N/A"))))
#crime$COMM <- ifelse(crime$NEIGHBOURHOOD %in% c('Central Business District', 'West End', 'Fairview', 'Mount Pleasant', 'Grandview-Woodland', 'Renfrew-Collingwood', 'Kitsilano', 'Kensington-Cedar Cottage', 'Strathcona', 'Hastings-Sunrise', 'Sunset'), "Unsafe_comm", ifelse(NEIGHBOURHOOD %in% c('Marpole', 'Riley Park', 'Victoria-Fraserview', 'Killarney', 'Oakridge', 'Dunbar-Southlands', 'Kerrisdale', 'Arbutus Ridge', 'West Point Grey', 'Shaughnessy', 'South Cambie', 'Stanley Park', 'Musqueam'), 'safe_comm', "N/A"))
crime$COMM <- ifelse(crime$NEIGHBOURHOOD %in% c('Central Business District', 'West End', 'Fairview', 'Mount Pleasant', 'Grandview-Woodland', 'Renfrew-Collingwood', 'Kitsilano', 'Kensington-Cedar Cottage'), "Unsafe_comm", ifelse(crime$NEIGHBOURHOOD %in% '', 'N/A','Safe_comm'))
#'Strathcona', 'Hastings-Sunrise', 'Sunset'
# Crime incidents by Community by Year
ggplot(crime, aes(x = YEAR, color = COMM)) +
geom_line(stat = 'count') +
#geom_point(stat = 'count') +
theme_classic() + labs(title = "Crime rates by Community by Year", x = "Year", y = "Number of Crimes" ) +
theme(axis.title = element_text(colour = "dark green"), title = element_text(colour = "blue"))
# Crime incidents by Community by Month
ggplot(crime, aes(x = MONTH, color = COMM)) +
geom_line(stat = 'count') +
#geom_point(stat = 'count') +
theme_classic() + labs(title = "Crime rates by Community by Month", x = "Month", y = "Number of Crimes" ) +
theme(axis.title = element_text(colour = "dark green"), title = element_text(colour = "blue"))
# Crime incidents by Community by Day
ggplot(crime, aes(x = DAY, color = COMM)) +
geom_line(stat = 'count') +
#geom_point(stat = 'count') +
theme_classic() + labs(title = "Crime rates by Community by Day", x = "Day", y = "Number of Crimes" ) +
theme(axis.title = element_text(colour = "dark green"), title = element_text(colour = "blue"))
# Crime incidents by Community by Hour
ggplot(crime, aes(x = HOUR, color = COMM)) +
geom_line(stat = 'count') +
#geom_point(stat = 'count') +
theme_classic() + labs(title = "Crime rates by Community by Hour", x = "Hour", y = "Number of Crimes" ) +
theme(axis.title = element_text(colour = "dark green"), title = element_text(colour = "blue"))
## Warning: Removed 54362 rows containing non-finite values (stat_count).
# Crime incidents by Community by Minute
ggplot(crime, aes(x = MINUTE, color = COMM)) +
geom_line(stat = 'count') +
#geom_point(stat = 'count') +
theme_classic() + labs(title = "Crime rates by Community by Minute", x = "Minute", y = "Number of Crimes" ) +
theme(axis.title = element_text(colour = "dark green"), title = element_text(colour = "blue"))
## Warning: Removed 54362 rows containing non-finite values (stat_count).
# Remove rows with N/A in COMM column
# Create graphs for percentage of crimes by CLASS
final_graph1 <- crime %>% filter(COMM %in% c("Safe_comm", "Unsafe_comm")) %>% ggplot(aes(x= CLASS, group=COMM)) +
geom_bar(aes(y = ..prop.., fill = factor(..x..)), stat="count") +
geom_text(aes( label = scales::percent(..prop..),
y= ..prop.. ), stat= "count", vjust = 0) +
labs(y = "Percent", fill="CLASS") + theme(axis.text.x = element_text(angle = 45, hjust = 1), axis.title = element_text(colour = "blue")) +
facet_grid(~COMM) +
scale_y_continuous(labels=percent)
#ggsave("Theft_unsafe_comm.png", plot = final_graph1)
#group_by(Company, flavor) %>% summarise(medPrice = median(price, na.rm=FALSE)) %>%
# ggplot(aes(x = flavor, y=medPrice, fill = Company)) + geom_histogram(position = "dodge", stat = "identity")
#crime %>% group_by(MONTH) %>% summarize(mean_MONTH = mean(MONTH, na.rm =FALSE)) %>% ggplot(x = YEAR, y = mean_MONTH) + geom_line(stat = 'count')
grid.arrange(
crime %>% ggplot(aes(x = YEAR)) + geom_line(stat = 'count')+ ylim(0, 50000) + theme_classic() + labs(title = "Total Annual Crime Rate", y = "Number of Crimes" , x = "") + theme(plot.title = element_text(size=10, colour = "blue"), axis.title = element_text(size = 10)),
crime %>% filter(!CLASS %in% "Theft") %>% ggplot(aes(x = YEAR)) + geom_line(stat = 'count')+ ylim(0, 50000) + theme_classic() + labs(title = "Annual Crime Rate Without Theft", y = "Number of Crimes" , x = "") + theme(plot.title = element_text(size=10, colour = "red"), axis.title = element_text(size = 10)),
crime %>% filter(CLASS %in% "Theft") %>% ggplot(aes(x = YEAR, color = CLASS, fill = CLASS)) + geom_line(stat = 'count') + ylim(0, 50000) +theme_classic() + theme(
legend.position = c(.99, .99),
legend.justification = c("right", "top"),
legend.box.just = "right",
legend.margin = margin(1, 1, 1, 1),
legend.title=element_text(size=5),
legend.text=element_text(size=5),
legend.key.size = unit(0.05, "cm")
) + labs(title = "Theft Annual Crime Rate", y = "Number of Crimes" , x = "") + theme(plot.title = element_text(size=10, colour = "blue"), axis.title = element_text(size = 10)),
crime %>% filter(!CLASS %in% "Theft") %>% ggplot(aes(x = YEAR, color = CLASS, fill = CLASS)) + geom_line(stat = 'count') + ylim(0, 30000) +theme_classic() +
theme(
legend.position = c(.99, .99),
legend.justification = c("right", "top"),
legend.box.just = "right",
legend.margin = margin(1, 1, 1, 1),
legend.title=element_text(size=5),
legend.text=element_text(size=5),
legend.key.size = unit(0.05, "cm"),
plot.title = element_text(size=10, colour = "red"),
axis.title = element_text(size = 10)) +
labs(title = "Annual Crime Rate of Other Crime Classes", y = "Number of Crimes" , x = ""),
top = "Annual Crime Rate Changes Over Years: Theft vs. Others"
)
#ggsave("Theft_most_impactable.png", plot = final_graph2)
# Create column Theft_Crime vs Non_Theft_Crime
crime$Tft <- ifelse(crime$TYPE %in% c('Theft from Vehicle', 'Theft of Bicycle', 'Theft of Vehicle', 'Other Theft'), 'Theft_Crime', 'Non_Theft_Crime')
crime$danger <- ifelse(crime$NEIGHBOURHOOD %in% c('Central Business District', 'West End', 'Fairview', 'Mount Pleasant', 'Grandview-Woodland', 'Renfrew-Collingwood', 'Kitsilano', 'Kensington-Cedar Cottage'), 'Dangerous_comm', 'Undangerous_comm')
crime_test <- crime %>% filter(!COMM %in% "N/A" )
table(crime$Tft, crime$danger)
##
## Dangerous_comm Undangerous_comm
## Non_Theft_Crime 114396 127241
## Theft_Crime 206181 82834
ggplot(crime_test,aes(x = danger,fill = Tft)) +
geom_bar(position = "fill") + theme_classic() + labs(title = "Proportion of Theft Crimes in Safe Community vs Unsafe Community", x = "", y = "Percentage" ) +
theme(axis.title = element_text(colour = "dark green"), title = element_text(colour = "blue")) + scale_y_continuous(labels=percent)
### Proportion test
prop.test(table(crime_test$Tft, crime_test$danger), correct= FALSE)
##
## 2-sample test for equality of proportions without continuity
## correction
##
## data: table(crime_test$Tft, crime_test$danger)
## X-squared = 5877.3, df = 1, p-value < 2.2e-16
## alternative hypothesis: two.sided
## 95 percent confidence interval:
## -0.1093533 -0.1038444
## sample estimates:
## prop 1 prop 2
## 0.6117368 0.7183356
df_sum <- crime %>% filter(!YEAR %in% '2017') %>%
group_by(YEAR) %>% tally()
ggplot(df_sum,aes(x =YEAR, y = n)) + geom_line() + geom_smooth(method = 'lm', color = 'red') + theme_classic() + labs(title = "Number of Crimes vs Year", subtitle = "Number of crime incidents goes down as year goes by", x = "Year", y = "Numer of Crimes" ) +
theme(axis.title = element_text(colour = "dark green"), title = element_text(colour = "blue"))
t.test(df_sum$YEAR, df_sum$n, conf.level = 0.95)
##
## Welch Two Sample t-test
##
## data: df_sum$YEAR and df_sum$n
## t = -17.601, df = 13, p-value = 1.892e-10
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
## -38867.76 -30369.67
## sample estimates:
## mean of x mean of y
## 2009.50 36628.21
df_sum_Theft <- crime %>% filter(CLASS %in% 'Theft') %>% filter(!YEAR %in% '2017') %>%
group_by(YEAR) %>% tally()
df_sum_not_Theft <- crime %>% filter(!CLASS %in% 'Theft') %>% filter(!YEAR %in% '2017') %>%
group_by(YEAR) %>% tally()
ggplot(df_sum_Theft, aes(x = YEAR, y = n)) + geom_smooth(method = 'lm') + ylim(10000, 30000) + theme_classic() + labs(title = "Number of Theft Crimes vs Year", subtitle = "Number of Theft crime incidents declines significantly as year passes", x = "Year", y = "Number of Theft Crimes" ) +
theme(axis.title = element_text(colour = "dark green"), title = element_text(colour = "blue"))
ggplot(df_sum_not_Theft, aes(x = YEAR, y = n)) + geom_smooth(method = 'lm') + ylim(10000, 30000) + theme_classic() + labs(title = "Number of Theft Crimes vs Year", subtitle = "Number of Theft crime incidents goes down at moderate degree as year passes", x = "Year", y = "Number of Theft Crimes" ) +
theme(axis.title = element_text(colour = "dark green"), title = element_text(colour = "blue"))
#Total_crime <- with(df_sum$YEAR, df_sum$n)
#Theft_crime <- with(df_sum$YEAR, df_sum$n)
t.test(df_sum$n, df_sum_Theft$n,paired = T, conf.level = 0.95)
##
## Paired t-test
##
## data: df_sum$n and df_sum_Theft$n
## t = 23.283, df = 13, p-value = 5.537e-12
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
## 15186.03 18292.40
## sample estimates:
## mean of the differences
## 16739.21
t.test(df_sum$n, df_sum_not_Theft$n,paired = T, conf.level = 0.95)
##
## Paired t-test
##
## data: df_sum$n and df_sum_not_Theft$n
## t = 15.187, df = 13, p-value = 1.184e-09
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
## 17059.81 22718.19
## sample estimates:
## mean of the differences
## 19889
#crime_n <- crime %>% filter(HUNDRED_BLOCK %in% "OFFSET TO PROTECT PRIVACY")
## Theft from Vehicle Map of Year 2003
crime_tv_2003 <- crime %>% filter(YEAR %in% "2003") %>% filter(TYPE %in% "Theft from Vehicle")
# Read in the data
rawdata1 <- data.frame(as.numeric(crime_tv_2003$Longitude), as.numeric(crime_tv_2003$Latitude))
names(rawdata1) <- c("lon", "lat")
data1 <- as.matrix(rawdata1)
# Rotate the lat-lon coordinates using a rotation matrix
# Trial and error lead to pi/15.0 = 12 degrees
theta = pi/15.0
m = matrix(c(cos(theta), sin(theta), -sin(theta), cos(theta)), nrow=2)
data1 <- as.matrix(data1) %*% m
# Reproduce William's original map
#par(bg='black')
#plot(data, cex=0.1, col="white", pch=16)
# Create heatmap with kde2d and overplot
k <- kde2d(data1[,1], data1[,2], n=500)
a <- kde2d(data1[,1], data1[,2], n=500)
# Intensity from green to red
cols <- rev(colorRampPalette(brewer.pal(8, 'BrBG'))(10000))
#par(bg='white')
#image(k, col=cols, xaxt='n', yaxt='n')
#points(data1, cex=0.1, pch=16)
# Mapping via RgoogleMaps
# Find map center and get map
center <- rev(sapply(rawdata1, mean))
map <- GetMap(center=center, zoom=12)
## sleptTotal= 0
# Translate original data
coords1 <- LatLon2XY.centered(map, rawdata1$lat, rawdata1$lon, 11)
coords1 <- data.frame(coords1)
# Rerun heatmap
#k2 <- kde2d(coords$newX, coords$newY, n=500)
# Create exponential transparency vector and add
alpha <- seq.int(0.5, 0.95, length.out=100)
alpha <- exp(alpha^6-1)
cols2 <- AddAlpha(cols, alpha)
# Plot
PlotOnStaticMap(map)
## [1] "Caution: map type is OpenStreetMap. Until we find the correct projection algorithm, we treat lat/lon like planar coordinates and set TrueProj = FALSE."
#image(k2, col=cols2, add=T)
points(coords1$newX, coords1$newY, pch=16, cex=0.3)
#-----------------------------------------------------#
crime_tv_2011 <- crime %>% filter(YEAR %in% "2011") %>% filter(TYPE %in% "Theft from Vehicle")
# Read in the data
rawdata2 <- data.frame(as.numeric(crime_tv_2011$Longitude), as.numeric(crime_tv_2011$Latitude))
names(rawdata2) <- c("lon", "lat")
data2 <- as.matrix(rawdata2)
# Rotate the lat-lon coordinates using a rotation matrix
# Trial and error lead to pi/15.0 = 12 degrees
theta = pi/15.0
m = matrix(c(cos(theta), sin(theta), -sin(theta), cos(theta)), nrow=2)
data2 <- as.matrix(data2) %*% m
# Reproduce William's original map
#par(bg='black')
#plot(data, cex=0.1, col="white", pch=16)
# Create heatmap with kde2d and overplot
k <- kde2d(data2[,1], data2[,2], n=500)
b <- kde2d(data2[,1], data2[,2], n=500)
# Intensity from green to red
cols <- rev(colorRampPalette(brewer.pal(8, 'BrBG'))(10000))
#par(bg='white')
#image(k, col=cols, xaxt='n', yaxt='n')
#points(data2, cex=0.1, pch=16)
# Mapping via RgoogleMaps
# Find map center and get map
center <- rev(sapply(rawdata2, mean))
map <- GetMap(center=center, zoom=12)
## sleptTotal= 0
# Translate original data
coords2 <- LatLon2XY.centered(map, rawdata2$lat, rawdata2$lon, 11)
coords2 <- data.frame(coords2)
# Rerun heatmap
#k2 <- kde2d(coords$newX, coords$newY, n=500)
# Create exponential transparency vector and add
alpha <- seq.int(0.5, 0.95, length.out=100)
alpha <- exp(alpha^6-1)
cols2 <- AddAlpha(cols, alpha)
# Plot
PlotOnStaticMap(map)
## [1] "Caution: map type is OpenStreetMap. Until we find the correct projection algorithm, we treat lat/lon like planar coordinates and set TrueProj = FALSE."
#image(k2, col=cols2, add=T)
points(coords2$newX, coords2$newY, pch=16, cex=0.3)
#------------------------------------------------#
crime_tv_2016 <- crime %>% filter(YEAR %in% "2016") %>% filter(TYPE %in% "Theft from Vehicle")
# Read in the data
rawdata3 <- data.frame(as.numeric(crime_tv_2016$Longitude), as.numeric(crime_tv_2016$Latitude))
names(rawdata3) <- c("lon", "lat")
data3 <- as.matrix(rawdata3)
# Rotate the lat-lon coordinates using a rotation matrix
# Trial and error lead to pi/15.0 = 12 degrees
theta = pi/15.0
m = matrix(c(cos(theta), sin(theta), -sin(theta), cos(theta)), nrow=2)
data3 <- as.matrix(data3) %*% m
# Reproduce William's original map
#par(bg='black')
#plot(data, cex=0.1, col="white", pch=16)
# Create heatmap with kde2d and overplot
k <- kde2d(data3[,1], data3[,2], n=500)
c <- kde2d(data3[,1], data3[,2], n=500)
# Intensity from green to red
cols <- rev(colorRampPalette(brewer.pal(8, 'BrBG'))(10000))
#par(bg='white')
#image(k, col=cols, xaxt='n', yaxt='n')
#points(data3, cex=0.1, pch=16)
# Mapping via RgoogleMaps
# Find map center and get map
center <- rev(sapply(rawdata3, mean))
map <- GetMap(center=center, zoom=12)
## sleptTotal= 0
# Translate original data
coords3 <- LatLon2XY.centered(map, rawdata3$lat, rawdata3$lon, 11)
coords3 <- data.frame(coords3)
# Rerun heatmap
#k2 <- kde2d(coords$newX, coords$newY, n=500)
# Create exponential transparency vector and add
alpha <- seq.int(0.5, 0.95, length.out=100)
alpha <- exp(alpha^6-1)
cols2 <- AddAlpha(cols, alpha)
# Plot
PlotOnStaticMap(map)
## [1] "Caution: map type is OpenStreetMap. Until we find the correct projection algorithm, we treat lat/lon like planar coordinates and set TrueProj = FALSE."
#image(k2, col=cols2, add=T)
points(coords3$newX, coords3$newY, pch=16, cex=0.3)
image(a, col=cols, xaxt='n', yaxt='n') +
points(data1, cex=0.1, pch=16)
## integer(0)
image(b, col=cols, xaxt='n', yaxt='n') +
points(data2, cex=0.1, pch=16)
## integer(0)
image(c, col=cols, xaxt='n', yaxt='n') +
points(data3, cex=0.1, pch=16)
## integer(0)
Due to large volume of incidents, I present 3 maps of 2003, 2011, 2016 to manifest the crime rate for Theft from Vehicle over years
Theft from Vehicle type plays a major role in annual trend of crime and year 2003 is the first year of the survey, year 2011 is the year with the least crime incidents, and year 2016 is the last year of data with the full year data and most recent
The graphs show the overall crime rate trend through years , we can see how the crime rate has been changes
As the maps show, most crime incidents occur downtown, downtown east side, and south region of downtown vicinity